-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[libc][math] Refactor atan2f implementation to header-only in src/__support/math folder. #150993
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-libc Author: Muhammad Bassiouni (bassiounix) ChangesPart of #147386 in preparation for: https://discourse.llvm.org/t/rfc-make-clang-builtin-math-functions-constexpr-with-llvm-libc-to-support-c-23-constexpr-math-functions/86450 Patch is 36.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150993.diff 10 Files Affected:
diff --git a/libc/shared/math.h b/libc/shared/math.h
index 0605d918eb2af..527bb8d6214ae 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -24,6 +24,7 @@
#include "math/asinhf16.h"
#include "math/atan.h"
#include "math/atan2.h"
+#include "math/atan2f.h"
#include "math/atanf.h"
#include "math/atanf16.h"
#include "math/erff.h"
diff --git a/libc/shared/math/atan2f.h b/libc/shared/math/atan2f.h
new file mode 100644
index 0000000000000..2de09d25e19f8
--- /dev/null
+++ b/libc/shared/math/atan2f.h
@@ -0,0 +1,23 @@
+//===-- Shared atan2f function ----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_ATAN2F_H
+#define LLVM_LIBC_SHARED_MATH_ATAN2F_H
+
+#include "shared/libc_common.h"
+#include "src/__support/math/atan2f.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::atan2f;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SHARED_MATH_ATAN2F_H
diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt
index bbb07b62552f6..c197b19ed29de 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -213,6 +213,23 @@ add_header_library(
libc.src.__support.macros.optimization
)
+add_header_library(
+ atan2f
+ HDRS
+ atan2f_float.h
+ atan2f.h
+ DEPENDS
+ .inv_trigf_utils
+ libc.src.__support.FPUtil.double_double
+ libc.src.__support.FPUtil.fenv_impl
+ libc.src.__support.FPUtil.fp_bits
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.nearest_integer
+ libc.src.__support.FPUtil.polyeval
+ libc.src.__support.macros.config
+ libc.src.__support.macros.optimization
+)
+
add_header_library(
atanf
HDRS
diff --git a/libc/src/__support/math/atan2f.h b/libc/src/__support/math/atan2f.h
new file mode 100644
index 0000000000000..e3b19329126f4
--- /dev/null
+++ b/libc/src/__support/math/atan2f.h
@@ -0,0 +1,351 @@
+//===-- Implementation header for atan2f ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ATAN2F_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_ATAN2F_H
+
+#include "inv_trigf_utils.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/double_double.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+
+#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS) && \
+ defined(LIBC_MATH_HAS_INTERMEDIATE_COMP_IN_FLOAT)
+
+// We use float-float implementation to reduce size.
+#include "atan2f_float.h"
+
+#else
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+namespace atan2f_internal {
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+// Look up tables for accurate pass:
+
+// atan(i/16) with i = 0..16, generated by Sollya with:
+// > for i from 0 to 16 do {
+// a = round(atan(i/16), D, RN);
+// b = round(atan(i/16) - a, D, RN);
+// print("{", b, ",", a, "},");
+// };
+static constexpr fputil::DoubleDouble ATAN_I[17] = {
+ {0.0, 0.0},
+ {-0x1.c934d86d23f1dp-60, 0x1.ff55bb72cfdeap-5},
+ {-0x1.cd37686760c17p-59, 0x1.fd5ba9aac2f6ep-4},
+ {0x1.347b0b4f881cap-58, 0x1.7b97b4bce5b02p-3},
+ {0x1.8ab6e3cf7afbdp-57, 0x1.f5b75f92c80ddp-3},
+ {-0x1.963a544b672d8p-57, 0x1.362773707ebccp-2},
+ {-0x1.c63aae6f6e918p-56, 0x1.6f61941e4def1p-2},
+ {-0x1.24dec1b50b7ffp-56, 0x1.a64eec3cc23fdp-2},
+ {0x1.a2b7f222f65e2p-56, 0x1.dac670561bb4fp-2},
+ {-0x1.d5b495f6349e6p-56, 0x1.0657e94db30dp-1},
+ {-0x1.928df287a668fp-58, 0x1.1e00babdefeb4p-1},
+ {0x1.1021137c71102p-55, 0x1.345f01cce37bbp-1},
+ {0x1.2419a87f2a458p-56, 0x1.4978fa3269ee1p-1},
+ {0x1.0028e4bc5e7cap-57, 0x1.5d58987169b18p-1},
+ {-0x1.8c34d25aadef6p-56, 0x1.700a7c5784634p-1},
+ {-0x1.bf76229d3b917p-56, 0x1.819d0b7158a4dp-1},
+ {0x1.1a62633145c07p-55, 0x1.921fb54442d18p-1},
+};
+
+// Taylor polynomial, generated by Sollya with:
+// > for i from 0 to 8 do {
+// j = (-1)^(i + 1)/(2*i + 1);
+// a = round(j, D, RN);
+// b = round(j - a, D, RN);
+// print("{", b, ",", a, "},");
+// };
+static constexpr fputil::DoubleDouble COEFFS[9] = {
+ {0.0, 1.0}, // 1
+ {-0x1.5555555555555p-56, -0x1.5555555555555p-2}, // -1/3
+ {-0x1.999999999999ap-57, 0x1.999999999999ap-3}, // 1/5
+ {-0x1.2492492492492p-57, -0x1.2492492492492p-3}, // -1/7
+ {0x1.c71c71c71c71cp-58, 0x1.c71c71c71c71cp-4}, // 1/9
+ {0x1.745d1745d1746p-59, -0x1.745d1745d1746p-4}, // -1/11
+ {-0x1.3b13b13b13b14p-58, 0x1.3b13b13b13b14p-4}, // 1/13
+ {-0x1.1111111111111p-60, -0x1.1111111111111p-4}, // -1/15
+ {0x1.e1e1e1e1e1e1ep-61, 0x1.e1e1e1e1e1e1ep-5}, // 1/17
+};
+
+// Veltkamp's splitting of a double precision into hi + lo, where the hi part is
+// slightly smaller than an even split, so that the product of
+// hi * (s1 * k + s2) is exact,
+// where:
+// s1, s2 are single precsion,
+// 1/16 <= s1/s2 <= 1
+// 1/16 <= k <= 1 is an integer.
+// So the maximal precision of (s1 * k + s2) is:
+// prec(s1 * k + s2) = 2 + log2(msb(s2)) - log2(lsb(k_d * s1))
+// = 2 + log2(msb(s1)) + 4 - log2(lsb(k_d)) - log2(lsb(s1))
+// = 2 + log2(lsb(s1)) + 23 + 4 - (-4) - log2(lsb(s1))
+// = 33.
+// Thus, the Veltkamp splitting constant is C = 2^33 + 1.
+// This is used when FMA instruction is not available.
+[[maybe_unused]] LIBC_INLINE static constexpr fputil::DoubleDouble
+split_d(double a) {
+ fputil::DoubleDouble r{0.0, 0.0};
+ constexpr double C = 0x1.0p33 + 1.0;
+ double t1 = C * a;
+ double t2 = a - t1;
+ r.hi = t1 + t2;
+ r.lo = a - r.hi;
+ return r;
+}
+
+// Compute atan( num_d / den_d ) in double-double precision.
+// num_d = min(|x|, |y|)
+// den_d = max(|x|, |y|)
+// q_d = num_d / den_d
+// idx, k_d = round( 2^4 * num_d / den_d )
+// final_sign = sign of the final result
+// const_term = the constant term in the final expression.
+LIBC_INLINE static float
+atan2f_double_double(double num_d, double den_d, double q_d, int idx,
+ double k_d, double final_sign,
+ const fputil::DoubleDouble &const_term) {
+ fputil::DoubleDouble q;
+ double num_r = 0, den_r = 0;
+
+ if (idx != 0) {
+ // The following range reduction is accurate even without fma for
+ // 1/16 <= n/d <= 1.
+ // atan(n/d) - atan(idx/16) = atan((n/d - idx/16) / (1 + (n/d) * (idx/16)))
+ // = atan((n - d*(idx/16)) / (d + n*idx/16))
+ k_d *= 0x1.0p-4;
+ num_r = fputil::multiply_add(k_d, -den_d, num_d); // Exact
+ den_r = fputil::multiply_add(k_d, num_d, den_d); // Exact
+ q.hi = num_r / den_r;
+ } else {
+ // For 0 < n/d < 1/16, we just need to calculate the lower part of their
+ // quotient.
+ q.hi = q_d;
+ num_r = num_d;
+ den_r = den_d;
+ }
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+ q.lo = fputil::multiply_add(q.hi, -den_r, num_r) / den_r;
+#else
+ // Compute `(num_r - q.hi * den_r) / den_r` accurately without FMA
+ // instructions.
+ fputil::DoubleDouble q_hi_dd = split_d(q.hi);
+ double t1 = fputil::multiply_add(q_hi_dd.hi, -den_r, num_r); // Exact
+ double t2 = fputil::multiply_add(q_hi_dd.lo, -den_r, t1);
+ q.lo = t2 / den_r;
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+
+ // Taylor polynomial, evaluating using Horner's scheme:
+ // P = x - x^3/3 + x^5/5 -x^7/7 + x^9/9 - x^11/11 + x^13/13 - x^15/15
+ // + x^17/17
+ // = x*(1 + x^2*(-1/3 + x^2*(1/5 + x^2*(-1/7 + x^2*(1/9 + x^2*
+ // *(-1/11 + x^2*(1/13 + x^2*(-1/15 + x^2 * 1/17))))))))
+ fputil::DoubleDouble q2 = fputil::quick_mult(q, q);
+ fputil::DoubleDouble p_dd =
+ fputil::polyeval(q2, COEFFS[0], COEFFS[1], COEFFS[2], COEFFS[3],
+ COEFFS[4], COEFFS[5], COEFFS[6], COEFFS[7], COEFFS[8]);
+ fputil::DoubleDouble r_dd =
+ fputil::add(const_term, fputil::multiply_add(q, p_dd, ATAN_I[idx]));
+ r_dd.hi *= final_sign;
+ r_dd.lo *= final_sign;
+
+ // Make sure the sum is normalized:
+ fputil::DoubleDouble rr = fputil::exact_add(r_dd.hi, r_dd.lo);
+ // Round to odd.
+ uint64_t rr_bits = cpp::bit_cast<uint64_t>(rr.hi);
+ if (LIBC_UNLIKELY(((rr_bits & 0xfff'ffff) == 0) && (rr.lo != 0.0))) {
+ Sign hi_sign = fputil::FPBits<double>(rr.hi).sign();
+ Sign lo_sign = fputil::FPBits<double>(rr.lo).sign();
+ if (hi_sign == lo_sign) {
+ ++rr_bits;
+ } else if ((rr_bits & fputil::FPBits<double>::FRACTION_MASK) > 0) {
+ --rr_bits;
+ }
+ }
+
+ return static_cast<float>(cpp::bit_cast<double>(rr_bits));
+}
+
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+} // namespace atan2f_internal
+
+// There are several range reduction steps we can take for atan2(y, x) as
+// follow:
+
+// * Range reduction 1: signness
+// atan2(y, x) will return a number between -PI and PI representing the angle
+// forming by the 0x axis and the vector (x, y) on the 0xy-plane.
+// In particular, we have that:
+// atan2(y, x) = atan( y/x ) if x >= 0 and y >= 0 (I-quadrant)
+// = pi + atan( y/x ) if x < 0 and y >= 0 (II-quadrant)
+// = -pi + atan( y/x ) if x < 0 and y < 0 (III-quadrant)
+// = atan( y/x ) if x >= 0 and y < 0 (IV-quadrant)
+// Since atan function is odd, we can use the formula:
+// atan(-u) = -atan(u)
+// to adjust the above conditions a bit further:
+// atan2(y, x) = atan( |y|/|x| ) if x >= 0 and y >= 0 (I-quadrant)
+// = pi - atan( |y|/|x| ) if x < 0 and y >= 0 (II-quadrant)
+// = -pi + atan( |y|/|x| ) if x < 0 and y < 0 (III-quadrant)
+// = -atan( |y|/|x| ) if x >= 0 and y < 0 (IV-quadrant)
+// Which can be simplified to:
+// atan2(y, x) = sign(y) * atan( |y|/|x| ) if x >= 0
+// = sign(y) * (pi - atan( |y|/|x| )) if x < 0
+
+// * Range reduction 2: reciprocal
+// Now that the argument inside atan is positive, we can use the formula:
+// atan(1/x) = pi/2 - atan(x)
+// to make the argument inside atan <= 1 as follow:
+// atan2(y, x) = sign(y) * atan( |y|/|x|) if 0 <= |y| <= x
+// = sign(y) * (pi/2 - atan( |x|/|y| ) if 0 <= x < |y|
+// = sign(y) * (pi - atan( |y|/|x| )) if 0 <= |y| <= -x
+// = sign(y) * (pi/2 + atan( |x|/|y| )) if 0 <= -x < |y|
+
+// * Range reduction 3: look up table.
+// After the previous two range reduction steps, we reduce the problem to
+// compute atan(u) with 0 <= u <= 1, or to be precise:
+// atan( n / d ) where n = min(|x|, |y|) and d = max(|x|, |y|).
+// An accurate polynomial approximation for the whole [0, 1] input range will
+// require a very large degree. To make it more efficient, we reduce the input
+// range further by finding an integer idx such that:
+// | n/d - idx/16 | <= 1/32.
+// In particular,
+// idx := 2^-4 * round(2^4 * n/d)
+// Then for the fast pass, we find a polynomial approximation for:
+// atan( n/d ) ~ atan( idx/16 ) + (n/d - idx/16) * Q(n/d - idx/16)
+// For the accurate pass, we use the addition formula:
+// atan( n/d ) - atan( idx/16 ) = atan( (n/d - idx/16)/(1 + (n*idx)/(16*d)) )
+// = atan( (n - d * idx/16)/(d + n * idx/16) )
+// And finally we use Taylor polynomial to compute the RHS in the accurate pass:
+// atan(u) ~ P(u) = u - u^3/3 + u^5/5 - u^7/7 + u^9/9 - u^11/11 + u^13/13 -
+// - u^15/15 + u^17/17
+// It's error in double-double precision is estimated in Sollya to be:
+// > P = x - x^3/3 + x^5/5 -x^7/7 + x^9/9 - x^11/11 + x^13/13 - x^15/15
+// + x^17/17;
+// > dirtyinfnorm(atan(x) - P, [-2^-5, 2^-5]);
+// 0x1.aec6f...p-100
+// which is about rounding errors of double-double (2^-104).
+
+LIBC_INLINE static constexpr float atan2f(float y, float x) {
+ using namespace atan2f_internal;
+ using namespace inv_trigf_utils_internal;
+ using FPBits = typename fputil::FPBits<float>;
+ constexpr double IS_NEG[2] = {1.0, -1.0};
+ constexpr double PI = 0x1.921fb54442d18p1;
+ constexpr double PI_LO = 0x1.1a62633145c07p-53;
+ constexpr double PI_OVER_4 = 0x1.921fb54442d18p-1;
+ constexpr double PI_OVER_2 = 0x1.921fb54442d18p0;
+ constexpr double THREE_PI_OVER_4 = 0x1.2d97c7f3321d2p+1;
+ // Adjustment for constant term:
+ // CONST_ADJ[x_sign][y_sign][recip]
+ constexpr fputil::DoubleDouble CONST_ADJ[2][2][2] = {
+ {{{0.0, 0.0}, {-PI_LO / 2, -PI_OVER_2}},
+ {{-0.0, -0.0}, {-PI_LO / 2, -PI_OVER_2}}},
+ {{{-PI_LO, -PI}, {PI_LO / 2, PI_OVER_2}},
+ {{-PI_LO, -PI}, {PI_LO / 2, PI_OVER_2}}}};
+
+ FPBits x_bits(x), y_bits(y);
+ bool x_sign = x_bits.sign().is_neg();
+ bool y_sign = y_bits.sign().is_neg();
+ x_bits.set_sign(Sign::POS);
+ y_bits.set_sign(Sign::POS);
+ uint32_t x_abs = x_bits.uintval();
+ uint32_t y_abs = y_bits.uintval();
+ uint32_t max_abs = x_abs > y_abs ? x_abs : y_abs;
+ uint32_t min_abs = x_abs <= y_abs ? x_abs : y_abs;
+ float num_f = FPBits(min_abs).get_val();
+ float den_f = FPBits(max_abs).get_val();
+ double num_d = static_cast<double>(num_f);
+ double den_d = static_cast<double>(den_f);
+
+ if (LIBC_UNLIKELY(max_abs >= 0x7f80'0000U || num_d == 0.0)) {
+ if (x_bits.is_nan() || y_bits.is_nan()) {
+ if (x_bits.is_signaling_nan() || y_bits.is_signaling_nan())
+ fputil::raise_except_if_required(FE_INVALID);
+ return FPBits::quiet_nan().get_val();
+ }
+ double x_d = static_cast<double>(x);
+ double y_d = static_cast<double>(y);
+ size_t x_except = (x_d == 0.0) ? 0 : (x_abs == 0x7f80'0000 ? 2 : 1);
+ size_t y_except = (y_d == 0.0) ? 0 : (y_abs == 0x7f80'0000 ? 2 : 1);
+
+ // Exceptional cases:
+ // EXCEPT[y_except][x_except][x_is_neg]
+ // with x_except & y_except:
+ // 0: zero
+ // 1: finite, non-zero
+ // 2: infinity
+ constexpr double EXCEPTS[3][3][2] = {
+ {{0.0, PI}, {0.0, PI}, {0.0, PI}},
+ {{PI_OVER_2, PI_OVER_2}, {0.0, 0.0}, {0.0, PI}},
+ {{PI_OVER_2, PI_OVER_2},
+ {PI_OVER_2, PI_OVER_2},
+ {PI_OVER_4, THREE_PI_OVER_4}},
+ };
+
+ double r = IS_NEG[y_sign] * EXCEPTS[y_except][x_except][x_sign];
+
+ return static_cast<float>(r);
+ }
+
+ bool recip = x_abs < y_abs;
+ double final_sign = IS_NEG[(x_sign != y_sign) != recip];
+ fputil::DoubleDouble const_term = CONST_ADJ[x_sign][y_sign][recip];
+ double q_d = num_d / den_d;
+
+ double k_d = fputil::nearest_integer(q_d * 0x1.0p4);
+ int idx = static_cast<int>(k_d);
+ double r = 0.0;
+
+#ifdef LIBC_MATH_HAS_SMALL_TABLES
+ double p = atan_eval_no_table(num_d, den_d, k_d * 0x1.0p-4);
+ r = final_sign * (p + (const_term.hi + ATAN_K_OVER_16[idx]));
+#else
+ q_d = fputil::multiply_add(k_d, -0x1.0p-4, q_d);
+
+ double p = atan_eval(q_d, idx);
+ r = final_sign *
+ fputil::multiply_add(q_d, p, const_term.hi + ATAN_COEFFS[idx][0]);
+#endif // LIBC_MATH_HAS_SMALL_TABLES
+
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ return static_cast<float>(r);
+#else
+ constexpr uint32_t LOWER_ERR = 4;
+ // Mask sticky bits in double precision before rounding to single precision.
+ constexpr uint32_t MASK =
+ mask_trailing_ones<uint32_t, fputil::FPBits<double>::SIG_LEN -
+ FPBits::SIG_LEN - 1>();
+ constexpr uint32_t UPPER_ERR = MASK - LOWER_ERR;
+
+ uint32_t r_bits = static_cast<uint32_t>(cpp::bit_cast<uint64_t>(r)) & MASK;
+
+ // Ziv's rounding test.
+ if (LIBC_LIKELY(r_bits > LOWER_ERR && r_bits < UPPER_ERR))
+ return static_cast<float>(r);
+
+ return atan2f_double_double(num_d, den_d, q_d, idx, k_d, final_sign,
+ const_term);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ATAN2F_H
diff --git a/libc/src/math/generic/atan2f_float.h b/libc/src/__support/math/atan2f_float.h
similarity index 95%
rename from libc/src/math/generic/atan2f_float.h
rename to libc/src/__support/math/atan2f_float.h
index 1fd853d735950..fe7d57ab30f0b 100644
--- a/libc/src/math/generic/atan2f_float.h
+++ b/libc/src/__support/math/atan2f_float.h
@@ -1,4 +1,4 @@
-//===-- Single-precision atan2f function ----------------------------------===//
+//===-- Single-precision atan2f float function ----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,18 +6,20 @@
//
//===----------------------------------------------------------------------===//
+#pragma once
+
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/double_double.h"
#include "src/__support/FPUtil/multiply_add.h"
#include "src/__support/FPUtil/nearest_integer.h"
-#include "src/__support/FPUtil/rounding_mode.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
-#include "src/math/atan2f.h"
namespace LIBC_NAMESPACE_DECL {
-namespace {
+namespace math {
+
+namespace atan2f_internal {
using FloatFloat = fputil::FloatFloat;
@@ -27,7 +29,7 @@ using FloatFloat = fputil::FloatFloat;
// b = round(atan(i/16) - a, SG, RN);
// print("{", b, ",", a, "},");
// };
-constexpr FloatFloat ATAN_I[17] = {
+static constexpr FloatFloat ATAN_I[17] = {
{0.0f, 0.0f},
{-0x1.1a6042p-30f, 0x1.ff55bcp-5f},
{-0x1.54f424p-30f, 0x1.fd5baap-4f},
@@ -57,7 +59,7 @@ constexpr FloatFloat ATAN_I[17] = {
// For x = x_hi + x_lo, fully expand the polynomial and drop any terms less than
// ulp(x_hi^3 / 3) gives us:
// P(x) ~ x_hi - x_hi^3/3 + x_lo * (1 - x_hi^2)
-FloatFloat atan_eval(const FloatFloat &x) {
+LIBC_INLINE static constexpr FloatFloat atan_eval(const FloatFloat &x) {
FloatFloat p;
p.hi = x.hi;
float x_hi_sq = x.hi * x.hi;
@@ -70,7 +72,7 @@ FloatFloat atan_eval(const FloatFloat &x) {
return p;
}
-} // anonymous namespace
+} // namespace atan2f_internal
// There are several range reduction steps we can take for atan2(y, x) as
// follow:
@@ -121,7 +123,8 @@ FloatFloat atan_eval(const FloatFloat &x) {
// > dirtyinfnorm(atan(x) - P, [-2^-5, 2^-5]);
// 0x1.995...p-28.
-LLVM_LIBC_FUNCTION(float, atan2f, (float y, float x)) {
+LIBC_INLINE static constexpr float atan2f(float y, float x) {
+ using namespace atan2f_internal;
using FPBits = typename fputil::FPBits<float>;
constexpr float IS_NEG[2] = {1.0f, -1.0f};
constexpr FloatFloat ZERO = {0.0f, 0.0f};
@@ -234,4 +237,6 @@ LLVM_LIBC_FUNCTION(float, atan2f, (float y, float x)) {
return final_sign * r.hi;
}
+} // namespace math
+
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 8071a0b013748..99c40bd442ddf 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -4045,18 +4045,8 @@ add_entrypoint_object(
atan2f.cpp
HDRS
../atan2f.h
- atan2f_float.h
DEPENDS
- libc.hdr.fenv_macros
- libc.src.__support.FPUtil.double_double
- libc.src.__support.FPUtil.fenv_impl
- libc.src.__support.FPUtil.fp_bits
- libc.src.__support.FPUtil.multiply_add
- libc.src.__support.FPUtil.nearest_integer
- libc.src.__support.FPUtil.polyeval
- libc.src.__support.FPUtil.rounding_mode
- libc.src.__support.macros.optimization
- libc.src.__support.math.inv_trigf_utils
+ libc.src.__support.math.atan2f
)
add_entrypoint_object(
diff --git a/libc/src/math/generic/atan2f.cpp b/libc/src/math/generic/atan2f.cpp
i...
[truncated]
|
079c38e
to
fd8651a
Compare
938716d
to
ffc1949
Compare
ffc1949
to
fb27c6b
Compare
29c47d3
to
bc90bfa
Compare
fb27c6b
to
546cd9d
Compare
bc90bfa
to
d411e78
Compare
546cd9d
to
a2300a6
Compare
d411e78
to
d2d3ef1
Compare
a2300a6
to
a87e1e1
Compare
e478ef9
to
9d59f53
Compare
9d59f53
to
08ea77f
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
clang-format
08ea77f
to
6262577
Compare
Merge activity
|
4ae5f97
to
3c00550
Compare
…upport/math folder.
3c00550
to
d8c0dd1
Compare
Part of #147386
in preparation for: https://discourse.llvm.org/t/rfc-make-clang-builtin-math-functions-constexpr-with-llvm-libc-to-support-c-23-constexpr-math-functions/86450